Companion notebook #4 of 4 of the paper
M.C.Soini et al., On the market displacement of incumbent grid-connected electricity storage by more efficient storage.
This notebook performs all analyses presented in section 3.3 of the paper. This file as well as static HTML exports are included with the paper's supplementary material as well as on github.
Instructions
from dask.distributed import Client
client = Client(dashboard_address='127.0.0.1:41012', n_workers=10)
client
import sys, os
import re
from glob import glob
import itertools
import functools
import numpy as np
import pandas as pd
import dask.dataframe as dd
import pandas as pd
from dask import delayed
from fastparquet import ParquetFile
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from IPython.display import Image
import grimsel.auxiliary.maps as maps
pd.option_context('mode.chained_assignment', None)
# Directory with model result parquet files
sc_out = 'out_grimsel/'
print_full = pd.option_context('display.max_rows', None,
'display.max_columns', None)
# conversion between ids and names of power plants, nodes, etc
mps = maps.Maps.from_parquet(sc_out)
# table defining model runs
def_run = pd.read_parquet(sc_out + 'def_run.parq')
# timemap table defining hours of the year
dict_mt2 = {mt: k for k, v in {'Summer': [4, 5, 6, 7, 8, 9],
'Winter': [0, 1, 2, 3, 10, 11]}.items() for mt in v}
df_tm_soy_full = (pd.read_parquet(sc_out + 'tm_soy_full.parq')
.assign(season2=lambda x: x.mt_id.replace(dict_mt2)))
# list of columns defining model variations
sw_cols = [c for c in def_run.columns if re.match('^sw.*vl$', c)]
def get_file_list(glob_in, slct_run_id=None):
if not slct_run_id:
slct_run_id = def_run.run_id
files = [fn for fn in glob_in if any(fn.endswith('{:04d}.parq'.format(run_id)) for run_id in slct_run_id)]
return sorted(files)
def ddf_from_parquet(pattern, slct_run_id=None, as_df=False, columns=None):
'''Read all parquet files of a certain file name pattern with certain run_ids'''
@delayed
def load_chunk(pth, columns):
return ParquetFile(pth).to_pandas(columns=columns)#.set_index('run_id')
files = get_file_list(glob(sc_out + pattern), slct_run_id)
assert len(files) > 0, f'Pattern {sc_out + pattern} yields empty file list.'
ddf = dd.from_delayed([load_chunk(f, columns=columns) for f in files])
if as_df:
ddf = ddf.compute()
return ddf
def ddf_add_tm_cols(ddf, tm_cols):
'''Join timemap tables'''
return ddf.join(df_tm_soy_full.set_index('sy')[tm_cols], on='sy')
run_idsswst_vl == "000%")value_diff of storage operation with respect to reference case# select susbet of years
slct_run_id = def_run.query('swfy_vl in ["yr2015", "yr2035", "yr2050"]')
def join_swst_reference(ddf):
slct_col_map = list(set(sw_cols) - {'swst_vl'})
map_run_id = slct_run_id.join(slct_run_id.query('swst_vl == "000%"').set_index(slct_col_map).run_id.rename('run_id_ref'),
on=slct_col_map)[['run_id', 'run_id_ref']]
return ddf.join(map_run_id.set_index('run_id'), on='run_id'), map_run_id
ddfpwr = ddf_from_parquet('var_sy_pwr*', slct_run_id=slct_run_id.run_id.tolist(),
columns=['sy', 'pp_id', 'value', 'bool_out', 'run_id'])
# only storage
ddfpwr = ddfpwr.loc[ddfpwr.pp_id.isin(mps.pp2pp('STO'))]
ddfpwr, map_run_id = join_swst_reference(ddfpwr)
## select reference values
ddfpwr_ref = ddfpwr.loc[ddfpwr.run_id == ddfpwr.run_id_ref].drop('run_id', axis=1).rename(columns={'value': 'value_ref'})
ddfpwr = dd.merge(ddfpwr.astype({'run_id_ref': np.int32}),
ddfpwr_ref.astype({'run_id_ref': np.int32}),
on=['sy', 'pp_id', 'run_id_ref', 'bool_out', ])
ddfpwr['value_diff'] = ddfpwr.value - ddfpwr.value_ref
ddfpwr['posflag'] = ddfpwr.value_diff > 0
dfpwrdiff_0 = ddfpwr.compute()
print('id_to_name')
dfpwrdiff_0 = mps.id_to_name(dfpwrdiff_0)
print('df_tm_soy_full')
dfpwrdiff_0 = dfpwrdiff_0.join(df_tm_soy_full.set_index('sy')[['hour', 'dow', 'dow_type', 'hom', 'doy', 'mt_id', 'season', 'season2']], on='sy')
dftotdch = (dfpwrdiff_0.loc[~dfpwrdiff_0.bool_out].groupby(['run_id', 'season2', 'pt'] + sw_cols).value.sum() * 1e-6).reset_index()
dfplot = dftotdch.loc[dftotdch.swfy_vl.isin(['yr2015', 'yr2035', 'yr2050'])]
dfplot['pt_fz'] = dfplot[['swfz_vl', 'pt']].apply('_'.join, axis=1)
fig = px.line(dfplot.query('swec_vl == "early_bat"')
.assign(ec_season=lambda x: x[['swec_vl', 'season2']].apply(tuple,axis=1).astype(str)),
x='swst_vl', y='value', hover_data=[],
facet_row=None, facet_col='swfy_vl', color='season2', line_group='pt_fz',
labels={'value': 'Discharged energy (TWh/yr)',
'swst_vl': 'New storage penetration (%)'})
fig
dfpwr = dfpwrdiff_0.loc[dfpwrdiff_0.swfy_vl.isin(['yr2015', 'yr2035', 'yr2050'])
& dfpwrdiff_0.swfz_vl.isin(['default'])
& dfpwrdiff_0.swst_vl.isin(["010%", "100%"])
# & dfpwr_0.pt.isin(['HYD_STO'])
]
dfpwrpv = dfpwr.groupby(['bool_out', 'pt', 'posflag', 'hour', 'run_id'] + sw_cols).value_diff.sum().reset_index()
fig = px.line(dfpwrpv.assign(line_group=lambda x: x[['bool_out', 'pt', 'run_id']].apply(tuple, axis=1).astype(str)),
x='hour', y='value_diff', hover_data=['bool_out'] + sw_cols,
facet_col='swfy_vl', facet_row='posflag', line_group='line_group', color='swec_vl')
fig.update_yaxes(matches=None)
ddfcap = ddf_from_parquet('par_cap_pwr_leg*', columns=['pp_id', 'run_id', 'value'])
ddfcap = ddfcap.loc[ddfcap.pp_id.isin(mps.pp2pp('STO'))]
dfcap = ddfcap.compute().rename(columns={'value': 'cap'})
# displacement in units MWh/MW_NEWSTO/day
tm_cols = ['season2']
dfnhours = df_tm_soy_full.groupby(tm_cols).apply(len).rename('nhours')
dfcap_pt = dfcap.assign(pt_id=dfcap.pp_id.replace(mps.dict_plant_2_pp_type_id)).groupby(['pt_id', 'run_id']).cap.sum()
dfpwr_dispspec = (dfpwrdiff_0.loc[~dfpwrdiff_0.bool_out]
.groupby(['bool_out', 'run_id', 'pt_id', 'pt'] + tm_cols + sw_cols)[['value', 'value_diff', 'value_ref']]
.sum().reset_index()
.join(dfcap_pt, on=['pt_id', 'run_id'])
.join(dfnhours, on=dfnhours.index.names)
)
dfpwr_dispspec['dispspec_cap'] = (dfpwr_dispspec.value_diff / dfpwr_dispspec.cap / dfpwr_dispspec.nhours).fillna(0)
dfpwr_dispspec['cf'] = (dfpwr_dispspec.value / dfpwr_dispspec.cap / dfpwr_dispspec.nhours).fillna(0)
dfpwr_dispspec['pt_ec'] = dfpwr_dispspec[["pt", "swec_vl"]].apply('_'.join, axis=1)
dfpwr_dispspec['all_tm'] = dfpwr_dispspec[tm_cols].astype(str).apply('_'.join, axis=1)
dfpwr_dispspec.to_csv('DATA_OUT_CSV/table_dispspec_cap.csv')
px.line(dfpwr_dispspec.query('swfy_vl in ["yr2015", "yr2050"] '
'and swec_vl == "early_phs"').sort_values(['swfy_vl', 'run_id', 'pt', ]),
x='swst_vl', y='dispspec_cap',
hover_data=['run_id'],
facet_col='swfz_vl', facet_row='swfy_vl', line_group='pt_ec', color='all_tm',
labels={'dispspec_cap': 'BAT capacity impact (-)', 'swst_vl': 'BAT penetration (%)'})
dfpwr_dispspec_erg_marg = (dfpwr_dispspec.pivot_table(index=['run_id', 'all_tm'] + tm_cols,
values='value', columns='pt', aggfunc='sum')
.reset_index().join(def_run.set_index('run_id')[sw_cols], on='run_id')
.assign(swst_vl=lambda x: x.swst_vl.apply(lambda y: int(y[:-1])))
.set_index(list(set(sw_cols) | set(tm_cols + ['all_tm', 'run_id'])))
.groupby(level=list((set(sw_cols) - {'swst_vl'}) | set(['all_tm'] + tm_cols))).apply(lambda df: df.diff())
.assign(dispspec_erg=lambda x: x.HYD_STO / x.NEW_STO)
.reset_index())
dfpwr_dispspec_erg_marg.to_csv('DATA_OUT_CSV/table_dispspec_marg.csv')
cols = [c for c in dfpwr_dispspec_erg_marg.columns
if not c in ['HYD_STO', 'NEW_STO', 'dispspec_erg', 'swst_vl', 'run_id', 'dispspec_erg_smooth']]
dfpwr_dispspec_erg_marg_smooth = (dfpwr_dispspec_erg_marg.groupby(cols)
.apply(lambda df: df.set_index('swst_vl')[['dispspec_erg']].sort_index().rolling(5, min_periods=0).mean())
.reset_index())
px.line(dfpwr_dispspec_erg_marg_smooth.query('swfy_vl in ["yr2015", "yr2050", "yr2035"] '
# ' and swst_vl <= 150 '
' and swfz_vl == "default"').sort_values(['swfy_vl','swst_vl']),
x='swst_vl', y='dispspec_erg', hover_data=[], color='all_tm',
facet_col='swfy_vl', facet_row='swfz_vl', line_group='swec_vl',
labels={'dispspec_erg': 'Marginal BAT energy impact (MWh<sub>PHS</sub>/MWh<sub>BAT</sub>)',
'swst_vl': 'BAT penetration (%)'})
dfpwr_dispspec_erg = (dfpwr_dispspec.pivot_table(index=['run_id', 'all_tm'],
values=['value_diff'],
columns='pt', aggfunc='sum')
.value_diff.assign(dispspec_erg=lambda x: x.HYD_STO / x.NEW_STO))
dfpwr_dispspec_erg = mps.id_to_name(dfpwr_dispspec_erg.reset_index())
dfpwr_dispspec_erg.to_csv('DATA_OUT_CSV/table_dispspec_erg.csv')
dfpwr_dispspec_erg['color'] = (dfpwr_dispspec_erg.swfz_vl + '_' + dfpwr_dispspec_erg.all_tm + '_' +
dfpwr_dispspec_erg.swfz_vl + '_' + dfpwr_dispspec_erg.swec_vl)
px.line(dfpwr_dispspec_erg.query('swfy_vl in ["yr2015", "yr2035", "yr2050"] '
#' and swfz_vl == "default"'
).sort_values(['swfy_vl', 'run_id']),
x='swst_vl', y='dispspec_erg',
hover_data=['run_id'], color='color',
facet_col='swfy_vl', line_group='swec_vl',
labels={'dispspec_erg': 'Average BAT energy impact (MWh<sub>PHS</sub>/MWh<sub>BAT</sub>)',
'swst_vl': 'BAT penetration (%)'})
dfpwrdiff_hourly = (dfpwrdiff_0.query('swfy_vl in ["yr2015", "yr2050"] and swec_vl == "early_bat"')
.groupby(['hour', 'run_id', 'season2', 'fl', 'pt', 'dow_type', 'dow', 'bool_out'])
[['value', 'value_ref', 'value_diff']].sum().reset_index())
dfpwrdiff_hourly
dfpwrdiff_hourly = mps.id_to_name(dfpwrdiff_hourly)
dfpwrdiff_hourly_slct = dfpwrdiff_hourly.loc[dfpwrdiff_hourly.swfy_vl.isin(['yr2015', 'yr2050'])
& dfpwrdiff_hourly.swec_vl.isin(['early_bat'])
& dfpwrdiff_hourly.swfz_vl.isin(['default'])
& dfpwrdiff_hourly.pt.isin(['HYD_STO'])
& dfpwrdiff_hourly.dow_type.isin(['WEEKDAY'])
& dfpwrdiff_hourly.swst_vl.isin(['010%', '300%'])
& dfpwrdiff_hourly.fl.isin(['pumped_hydro'])]
dfpwrdiff_hourly_slct_agg = dfpwrdiff_hourly_slct.groupby(['swfy_vl', 'fl', 'swst_vl', 'season2', 'hour', 'dow_type', 'bool_out']).value_diff.apply(np.mean).reset_index()
fig = px.line(dfpwrdiff_hourly_slct_agg,
x='hour', y='value_diff', hover_data=['fl'], color='swfy_vl',
facet_col='swst_vl', facet_row='bool_out', line_group='season2',
labels={'value_diff': 'PHS power reduction (MWh<sub>PHS</sub>)',
'hour': 'Hour of the day'})
fig
dfslct = dfpwrdiff_0.loc[dfpwrdiff_0.swst_vl.isin(['000%', '300%'])
& dfpwrdiff_0.swfz_vl.isin(['default'])
& dfpwrdiff_0.swec_vl.isin(['early_bat'])
& dfpwrdiff_0.swfy_vl.isin(['yr2035'])
]
dfplot = (dfslct.query('pp == "DE_HYD_STO"')
.pivot_table(index=['hom', 'bool_out', 'swst_vl'], values='value')
.reset_index())
px.line(dfplot, x='hom', y='value', hover_data=[], color='bool_out',
facet_col=None, facet_row='swst_vl', line_group='bool_out',
labels={'value': 'PHS energy (MWh<sub>PHS</sub>)',
'hom': 'Hour of the month'})
slct_run_id = def_run.query('swst_vl == "000%" and swec_vl == "early_phs"').run_id.tolist()
ddfdual = ddf_from_parquet('dual_supply*', columns=['sy', 'nd_id', 'value', 'run_id'], slct_run_id=slct_run_id)
ddfdual = ddfdual.compute().rename(columns={'value': 'price'})
ddfdmnd = ddf_from_parquet('par_dmnd*', columns=['sy', 'nd_id', 'value', 'run_id'], slct_run_id=slct_run_id)
ddfdmnd = ddfdmnd.compute().rename(columns={'value': 'dmnd'})
pddd = pd
ddfdual_weight = pd.merge(ddfdual, ddfdmnd, on=['sy', 'nd_id', 'run_id'])
ddfdual_weight['price_dmnd'] = ddfdual_weight.price * ddfdual_weight.dmnd
ddfdual_weight = ddfdual_weight.join(df_tm_soy_full.set_index('sy')[['hour', 'dow_type', 'season2']], on='sy')
ddfdual_weight = ddfdual_weight.loc[ddfdual_weight.dow_type == 'WEEKDAY']
tm_cols = ['hour']
ddfdual_weight = ddfdual_weight.groupby(['run_id'] + tm_cols)[['price_dmnd', 'dmnd']].sum().reset_index()
dfdual_weight = ddfdual_weight.compute() if pddd == dd else ddfdual_weight
dfdual_weight['price_weight'] = dfdual_weight.price_dmnd / dfdual_weight.dmnd
dfdual_weight['price_weight_shifted'] = dfdual_weight.groupby(['run_id'] + list(set(tm_cols) - {'hour'})).price_weight.apply(lambda x: x - x.mean())
dfdual_weight['price_weight_scaled'] = dfdual_weight.groupby(['run_id'] + list(set(tm_cols) - {'hour'})).price_weight.apply(lambda x: x / x.mean())
dfdual_weight = mps.id_to_name(dfdual_weight[['price_weight','price_weight_shifted', 'price_weight_scaled',
'price_dmnd', 'dmnd', 'run_id'] + tm_cols])
dfdual_weight['season_yr'] = (#dfdual_weight.season2 + '_' +
dfdual_weight.swfy_vl)
dfdual_weight.to_csv('DATA_OUT_CSV/table_price_profiles.csv')
data = (dfdual_weight.query('swfy_vl in ["yr2015", "yr2035", "yr2050"] '
' and swst_vl == "000%"')
.sort_values(['swfy_vl', 'hour', 'swfz_vl', 'run_id']))
fig = px.line(data, x='hour', y='price_weight_scaled', hover_data=['run_id', 'swec_vl', 'swst_vl'],
facet_col='swfz_vl', color='season_yr',
labels={'price_weight_scaled': 'Price normalized by mean (-)',
'hour': 'Hour of the day'})
fig
slct_run_id = def_run.query('swst_vl in ["000%", "010%", "100%"] and swfz_vl == "default" '
' and swfy_vl in ["yr2015", "yr2035", "yr2050"] and swec_vl == "early_bat"')
ddfpwr = ddf_from_parquet('var_sy_pwr*', slct_run_id=slct_run_id.run_id.tolist())
ddfpwr['pt_id'] = ddfpwr.pp_id.replace(mps.dict_plant_2_pp_type_id)
ddfpwr['fl_id'] = ddfpwr.pp_id.replace(mps.dict_plant_2_fuel_id)
ddfpwr = ddfpwr.groupby(['sy', 'fl_id', 'bool_out', 'run_id']).value.sum().reset_index()
dfpwr_0 = ddfpwr.compute()
dfpwr = (mps.id_to_name(dfpwr_0).join(df_tm_soy_full.set_index('sy')[['hour', 'season2', 'dow_type']], on='sy')
.query('dow_type == "WEEKDAY"')
.pivot_table(index=['hour', 'season2', 'run_id', 'bool_out', 'swfy_vl', 'swst_vl', 'fl'],
values='value', aggfunc=np.mean).reset_index()
)
dfpwr.loc[dfpwr.bool_out, 'value'] *= -1
dfpwr['st_season'] = dfpwr.season2 + '_' + dfpwr.swst_vl
dfpwr['fl_bool_out'] = dfpwr.fl + '_' + dfpwr.bool_out.astype(str)
dfpwr.to_csv('DATA_OUT_CSV/table_hourly_dispatch.csv')
def make_plot(qry=''):
return px.area(dfpwr.query('fl not in ["dmnd", "lost_load"] and swfy_vl in ["yr2015", "yr2050"]' + qry),
x='hour', y='value', facet_col='st_season', facet_row='swfy_vl',
line_group='fl_bool_out', color='fl_bool_out',
labels={'value': 'Average power (MW)',
'hour': 'Hour of the day'})
display(make_plot())
display(make_plot(' and fl in ["new_storage", "pumped_hydro"]'))
slct_run_id = def_run.query('swst_vl in ["000%", "010%", "050%", "100%"]').run_id.tolist()
ddfpwr = ddf_from_parquet('var_sy_pwr*', slct_run_id=slct_run_id,
columns=['sy', 'run_id', 'pp_id', 'value', 'bool_out'])
ddfpwr = ddfpwr.loc[ddfpwr.pp_id.isin(mps.pp2pp('STO'))
& ~ddfpwr.bool_out]
ddfpwr['pt_id'] = ddfpwr.pp_id.replace(mps.dict_plant_2_pp_type_id)
ddfpwr = ddfpwr.groupby(['sy', 'pt_id', 'bool_out', 'run_id']).value.sum().reset_index()
dfpwr_0 = ddfpwr.compute()
dfpwr = dfpwr_0.copy()
ddfcap = ddf_from_parquet('par_cap_pwr_leg*', slct_run_id=slct_run_id)
ddfcap = ddfcap.loc[ddfcap.pp_id.isin(mps.pp2pp('STO'))]
dfcap = ddfcap.compute()
dfcap = mps.id_to_name(dfcap)
dfcap = dfcap.groupby(['run_id', 'pt_id']).value.sum().rename('cap')
dfcf = (dfpwr.join(df_tm_soy_full.set_index('sy')['season2'], on='sy')
.loc[~dfpwr.bool_out]
.groupby(['pt_id', 'run_id', 'bool_out'])
.value.sum().rename('sum').reset_index()
.join(dfcap, dfcap.index.names))
dfcf = mps.id_to_name(dfcf)
tm_cols_cf = list(set(df_tm_soy_full.columns) & set(dfcf.columns))
if tm_cols_cf:
dfcf = dfcf.join(df_tm_soy_full.groupby(tm_cols_cf).apply(len).rename('len'), on=tm_cols_cf)
else:
dfcf['len'] = len(df_tm_soy_full) # <- 8760
dfcf['cf'] = dfcf['sum'] / (dfcf['len'] * dfcf.cap)
dfcf.to_csv('DATA_OUT_CSV/table_cf.csv')
fig = px.line(dfcf.query('swst_vl in ["000%", "010%", "050%", ] and swfy_vl <= "yr2050" and swec_vl == "early_phs"').sort_values(['swst_vl', 'swfy_vl', 'swec_vl']),
x='swfy_vl', y='cf', hover_data=['run_id', 'swec_vl'],
facet_col='swfz_vl', line_group='swst_vl', color='pt',
labels={'swfy_vl': 'Year', 'cf': 'Discharging capacity factor (-)'})
fig.update_yaxes(range=[0, 0.12])
import storedisagg
storedisagg.__file__
def run_std(x):
name, x = x
dict_tot = x[['dch', 'chg']].sum().to_dict()
assert dict_tot['chg'] > 0, str((name, x))
eff = dict_tot['dch'] / dict_tot['chg']
std = storedisagg.StDisaggregator(x, eff, 'share', print_progress=False)
std.run()
return (std.df_full_stacked.assign(**dict(zip(group_levels, name))),
std.df_step_evts.assign(**dict(zip(group_levels, name))))
def get_full_result_tables(results, group_levels, sw_cols, index_cols):
df_step_0 = pd.concat([res[1] for res in results], axis=0, sort=False).assign(is_empty=lambda x: x.time_diff_icd.isna())
df_full_0 = pd.concat([res[0] for res in results], axis=0, sort=False).join(df_step_0.set_index(list(index_cols)).is_empty, on=list(index_cols))
df_step_0 = df_step_0.loc[~df_step_0.is_empty]
df_full_0 = df_full_0.loc[~df_full_0.is_empty]
df_step_0 = df_step_0.assign(comp_echg=lambda x: x.comp_ichg / np.sqrt(x.eff),
comp_edch=lambda x: x.comp_idch * np.sqrt(x.eff))
return df_step_0, df_full_0
slct_run_id = def_run.query('(swst_vl <= "100%" or swst_vl in ["100%", "150%", "200%", "250%", "300%"])'
' and swfy_vl in ["yr2015", "yr2035", "yr2050"]'
' and swfz_vl == "default"').reset_index(drop=True)
group_levels = sw_cols + ['run_id', 'pt']
index_cols = (set(group_levels) - set(sw_cols)) | set(['nevent'])
dfpwr_0 = ddf_from_parquet('var_sy_pwr_*', slct_run_id=slct_run_id.run_id.tolist(),
columns=['sy', 'pp_id', 'value', 'bool_out', 'run_id'])
dfpwr_0 = dfpwr_0.loc[dfpwr_0.pp_id.isin(mps.pp2pp('STO'))]
dfpwr_0['pt_id'] = dfpwr_0.pp_id.replace(mps.dict_plant_2_pp_type_id)
dfpwr_0 = dfpwr_0.groupby(['sy', 'pt_id', 'bool_out', 'run_id'])[['value']].sum()#.reset_index().set_index(['run_id'])
dfpwr_0 = mps.id_to_name(dfpwr_0.compute().reset_index())
dfpwr = dfpwr_0.copy()
dfpwr_gp = (dfpwr.pivot_table(index=sw_cols + ['run_id', 'pt', 'sy'],
columns='bool_out', values='value', aggfunc='sum')
.rename(columns={False: 'dch', True: 'chg'})
.reset_index('sy')
.rename(columns={'sy': 't'}))
# skip NEW_STO in case capacity is zero
dfpwr_gp = dfpwr_gp.loc[~((dfpwr_gp.index.get_level_values('swst_vl').isin(['000%', '0.0%']))
& (dfpwr_gp.index.get_level_values('pt') == 'NEW_STO'))]
list_gp = list(dfpwr_gp.groupby(level=group_levels))
from multiprocessing import Pool
storedisagg.logger.setLevel('ERROR')
with Pool(24) as pool:
results = pool.map(run_std, list_gp)
df_step_0, df_full_0 = get_full_result_tables(results, group_levels, sw_cols, index_cols)
sys.exit() # accidental overwrite protection
df_step_0.to_parquet('df_step_0_disagg_analysis.parquet', compression='gzip')
df_full_0.to_parquet('df_full_0_disagg_analysis.parquet', compression='gzip')
df_step_0 = pd.read_parquet('df_step_0_disagg_analysis.parquet')
df_full_0 = pd.read_parquet('df_full_0_disagg_analysis.parquet')
df_step_0 = df_step_0[group_levels + ['nevent', 'iteration', 'comp_ichg', 'comp_idch', 'comp_echg', 'comp_edch',
'wgt_center_erg_ichg', 'time_diff_icd']].reset_index(drop=True)
df_step_0['bin_coarse'] = pd.cut(df_step_0.time_diff_icd, [0, 6, 12, 24, 48, 168, 10000]).astype(str)
df_step_0['bin_fine'] = pd.cut(df_step_0.time_diff_icd, np.arange(-1 / 2, 500.5, 1)).apply(lambda x: x.mid).astype(float).fillna(500)
df_step_0['comp_sy_chg'] = df_step_0.wgt_center_erg_ichg.round().astype(int)
df_step_0.drop('time_diff_icd', axis=1, inplace=True)
df_step_0 = df_step_0.join(df_tm_soy_full.rename(columns={'sy': 'comp_sy_chg'}
).set_index('comp_sy_chg')[['season2']], on='comp_sy_chg')
df_full_0 = df_full_0.join(df_step_0.set_index(['pt', 'run_id', 'nevent'])[['bin_coarse', 'bin_fine']],
on=['pt', 'run_id', 'nevent'])
df_full = df_full_0.copy()
df_step = df_step_0.copy()
df_step_hist = df_step.groupby(group_levels + ['bin_fine']).comp_ichg.sum().fillna(0).reset_index()
df_step_hist['comp_ichg_norm'] = df_step_hist.groupby(list(set(index_cols) - set(['nevent']))).comp_ichg.apply(lambda x: x/x.sum())
df_step_hist.to_csv('DATA_OUT_CSV/table_step_hist.csv')
fig = px.bar(df_step_hist.query('swfz_vl == "default" '
' and pt in ["HYD_STO"] '
' and swst_vl in ["000%", "010%"] '
'and swfy_vl in ["yr2015", "yr2035", "yr2050"] '
'and swst_vl in ["000%", "010%", "020%", "050%", "100%", "200%"]'
).assign(year_ec=df_step_hist[['swfy_vl', 'swec_vl']].apply(tuple, axis=1),
pt_year=df_step_hist[['pt', 'swfy_vl']].apply(tuple, axis=1),
pt_season=df_step_hist[['pt']].apply(tuple, axis=1)),
x='bin_fine', y='comp_ichg', hover_data=['pt', 'swec_vl'],
facet_col='pt_year', facet_row='swst_vl', #line_group='swfy_vl',
color='swec_vl', height=1000)
fig.update_layout(barmode='overlay', xaxis={'range': (0, 48)})
fig.update_traces(opacity=0.5)
df_coarse = (df_step.query('bin_coarse != "nan"')
.groupby(sw_cols + ['run_id', 'bin_coarse', 'pt'])[['comp_ichg']].sum()
.reset_index().sort_values('run_id')
.assign(comp_ichg_norm=lambda df: df.groupby(['run_id', 'pt']).comp_ichg.apply(lambda x: x / x.sum())))
df_coarse.to_csv('DATA_OUT_CSV/table_disagg_coarse.csv')
df_coarse['pt_ec'] = df_coarse[['pt', 'swec_vl']].apply('_'.join, axis=1)
df_coarse = df_coarse.sort_values('pt_ec')
fig = px.bar(df_coarse.query('swfz_vl == "default"'
' and swfy_vl in ["yr2015", "yr2035", "yr2050"]'
).sort_values(['pt_ec', 'swfy_vl', 'run_id']),
x='swst_vl', y='comp_ichg_norm', facet_col='pt_ec', facet_row='swfy_vl',
color='bin_coarse', height=500)
fig.update_yaxes(matches=None)
(
df_step.groupby(['run_id', 'pt'])[['comp_ichg', 'comp_idch']].sum(),
df_full.groupby(['run_id', 'pt'])[['ichg', 'idch']].sum(),
dfpwr_gp.query('run_id == 0 and pt == "HYD_STO"').sum() * np.sqrt(0.75)
)
#df_step.query('run_id == 0 and pt == "HYD_STO"').comp_ichg.sum()
# check total binned values
dfnet = dfpwr.pivot_table(index=['sy', 'run_id', 'pt'], columns=['bool_out'],
aggfunc=sum, values='value'
).reset_index().assign(eff=lambda x: x.reset_index().pt.replace({'HYD_STO': 0.75, 'NEW_STO': 0.9}),
net=lambda x: x[False] / np.sqrt(x.eff) - x[True] * np.sqrt(x.eff))
#dfnet[False] = dfnet.net.where(dfnet.net > 0, 0)
#dfnet[True] = - dfnet.net.where(dfnet.net < 0, 0)
#
#dfnet = dfnet.groupby(['run_id', 'pt'])[[True, False]].sum()
dfnet.groupby(['pt', 'run_id'])[[True, False]].sum()
#with print_full:
# display(
# pd.merge(
# df_step.groupby(['run_id', 'pt']).comp_idch.sum().reset_index(),
# dfnet.reset_index(),
# on=['run_id', 'pt']).assign(dev=lambda y: y.apply(lambda x: f'{(x.comp_idch - x[False]) / x[False] * 100:.2f}%', axis=1))
# )
df_full_plot = (df_full.loc[(df_full.swfz_vl == "default")
& (df_full.swst_vl.isin(["000%", "010%", "050%", "100%", "200%", "300%"]))]
.rename(columns={'t_orig': 'sy'})
.join(df_tm_soy_full.set_index('sy')[['hour', 'how']], on='sy'))
group_cols = ['hour', 'run_id', 'pt', 'bin_coarse'] + sw_cols
df_full_plot_hourly = (df_full_plot.groupby(group_cols)[['ichg', 'idch']]
.sum().stack().rename('value').reset_index().rename(columns={f'level_{len(group_cols)}': 'cd'}))
group_cols_weekly = ['how', 'run_id', 'pt', 'bin_coarse'] + sw_cols
df_full_plot_weekly = (df_full_plot.groupby(group_cols_weekly)[['ichg', 'idch']]
.sum().stack().rename('value').reset_index().rename(columns={f'level_{len(group_cols)}': 'cd'}))
df_full_plot_hourly.to_csv('DATA_OUT_CSV/table_df_full_for_patterns_hourly.csv')
df_full_plot_weekly.to_csv('DATA_OUT_CSV/table_df_full_for_patterns_weekly.csv')
fig = px.line(df_full_plot_hourly.query('swst_vl == "010%" and pt == "NEW_STO"'),# and swst_vl in ["000%", "010%", "100%"]'),
x='hour', y='value',
hover_data=['pt'], facet_col='swfy_vl', facet_row='bin_coarse',
line_group='swec_vl', color='cd', height=1000)
fig.update_yaxes(matches=None)
df_full_coarse_comp = (df_full_0.groupby(['t_orig', 'run_id', 'bin_coarse', 'pt'])[['ichg', 'idch']]
.sum().reset_index().rename(columns={'t_orig': 'sy'}))
ddfpwr_VRE = ddf_from_parquet('var_sy_pwr*', columns=['sy', 'pp_id', 'run_id', 'value'],
slct_run_id=slct_run_id.run_id.tolist())
ddfpwr_VRE = ddfpwr_VRE.loc[ddfpwr_VRE.pp_id.isin(mps.pp2pp('WIN|SOL'))]
ddfpwr_VRE['pt_id'] = ddfpwr_VRE.pp_id.replace(mps.dict_plant_2_pp_type)
ddfpwr_VRE = ddfpwr_VRE.groupby(['pt_id', 'sy', 'run_id']).value.sum().rename('pwr').reset_index()
dfpwr_VRE_0 = ddfpwr_VRE.compute()
dfpwr_VRE = (dfpwr_VRE_0.assign(pt=lambda x: x.pt_id.replace(mps.dict_pt))
.pivot_table(index=['run_id', 'sy'], values='pwr', columns='pt'))
dfpwr_VRE['wind_total'] = dfpwr_VRE.WIN_OFF + dfpwr_VRE.WIN_ONS
dfpwr_VRE = dfpwr_VRE.drop(['WIN_ONS', 'WIN_OFF'], axis=1).rename(columns={'SOL_PHO': 'solar'}).reset_index()
df_full_coarse_comp = mps.id_to_name(df_full_coarse_comp.join(dfpwr_VRE.set_index(['sy', 'run_id']), on=['sy', 'run_id']))
# get correlations
df_full_coarse_comp = df_full_coarse_comp.groupby(['run_id', 'bin_coarse', 'pt'])[['wind_total', 'solar', 'ichg', 'idch']].corr()
x = mps.id_to_name(df_full_coarse_comp.reset_index('run_id'))
x.index = x.index.rename(['pt', 'bin_coarse', 'profile'])
x.to_csv('DATA_OUT_CSV/table_correlation_chgdch_vre.csv')
x = x.reset_index()
x['year_profile'] = x[['swfy_vl', 'profile']].apply('_'.join, axis=1)
fig = px.line(x.query('profile not in ["idch", "ichg"] and swec_vl == "early_phs" and swfy_vl in ["yr2020", "yr2035"]'
).sort_values(['swst_vl', 'swfy_vl']),
x='swst_vl', y='ichg', facet_col='year_profile', color='pt',
facet_row='bin_coarse', height=600)
fig
Assess the marginal price differences/ratios between charging and discharging. This needs to make use of the storage operation disaggregation.
We are interested in the BAT dependence for the default parameter configurations otherwise:
slct_run_id = def_run.query('swst_vl <= "300%"'
' and swfy_vl in ["yr2015", "yr2020", "yr2025", "yr2030", "yr2035"]'
' and swec_vl == "early_phs"'
' and swfz_vl == "default"').reset_index(drop=True)
slct_run_id
dfpwr_0 = ddf_from_parquet('var_sy_pwr_*', slct_run_id=slct_run_id.run_id.tolist(),
columns=['sy', 'pp_id', 'value', 'bool_out', 'run_id'])
dfpwr_0 = dfpwr_0.loc[dfpwr_0.pp_id.isin(mps.pp2pp('STO'))]
dfpwr_0 = dfpwr_0.compute()
dfpwr = dfpwr_0.copy()
dfpwr_gp = (dfpwr.pivot_table(index=['run_id', 'pp_id', 'sy'],
columns='bool_out', values='value', aggfunc='sum')
.rename(columns={False: 'dch', True: 'chg'})
.reset_index('sy')
.rename(columns={'sy': 't'}))
# skip NEW_STO in case capacity is zero
list_run_id_swst_zero = def_run.query('swst_vl == "000%"').run_id.tolist()
dfpwr_gp = dfpwr_gp.loc[~((dfpwr_gp.index.get_level_values('run_id').isin(list_run_id_swst_zero))
& (dfpwr_gp.index.get_level_values('pp_id').isin(mps.pp2pp('NEW_STO'))))]
# perform the disaggregation for each *plant* separately, i.e. don't aggregate all PHS
# plants in the five countries
group_levels = ['run_id', 'pp_id']
list_gp = list(dfpwr_gp.groupby(level=group_levels))
# parallel disaggregation of the storage operation by model run and power plant id
from multiprocessing import Pool
storedisagg.logger.setLevel('ERROR')
with Pool(24) as pool:
results = pool.map(run_std, list_gp)
# aggregate results
index_cols = (set(group_levels) - set(sw_cols)) | set(['nevent'])
df_step_0, df_full_0 = get_full_result_tables(results, group_levels, sw_cols, index_cols)
df_full_0['nd_id'] = df_full_0.pp_id.replace(mps.dict_plant_2_node_id)
sys.exit() # accidental overwrite protection
df_step_0.to_parquet('df_step_0_price_differences.parquet', compression='gzip')
df_full_0.to_parquet('df_full_0_price_differences.parquet', compression='gzip')
# load previous results
df_step_0 = pd.read_parquet('df_step_0_price_differences.parquet')
df_full_0 = pd.read_parquet('df_full_0_price_differences.parquet')
# stack full output table by charging/discharging (are separate column in storedisagg results)
df_full_melt = df_full_0.melt(id_vars=['t_orig', 'nevent', 'nd_id', 'run_id', 'pp_id'],
value_vars=['ichg', 'idch'], value_name='pwr', var_name='chgdch')
# add marginal cost to the full output table
dfmc = ddf_from_parquet('dual_supply*', slct_run_id=slct_run_id.run_id.tolist()).compute()
dfmc = dfmc.rename(columns={'sy': 't_orig'}).set_index(['nd_id', 't_orig', 'run_id']).value.rename('mc')
df_full_mc = df_full_melt.join(dfmc, on=dfmc.index.names)
df_full_mc.head()
# calculate product of prices `mc` and chg/dch power for later calculation of weighted average mc
df_full_mc_avg = (df_full_mc.assign(mc_pwr=df_full_mc.pwr * df_full_mc.mc)
.groupby(['pp_id', 'chgdch', 'run_id', 'nevent'])[['mc_pwr', 'pwr']].sum()
.reset_index())
df_full_mc_avg['mc_weighted'] = df_full_mc_avg.mc_pwr / df_full_mc_avg.pwr
df_full_mc_avg = mps.id_to_name(df_full_mc_avg)
df_full_mc_avg.head()
df_full_mc_avg_ratio = df_full_mc_avg.pivot_table(index=['pp_id', 'run_id', 'nevent'],
columns='chgdch', values=['mc_weighted', 'pwr'], aggfunc=sum)
df_full_mc_avg_ratio.columns = map('_'.join, df_full_mc_avg_ratio.columns)
df_full_mc_avg_ratio = df_full_mc_avg_ratio.query('pwr_idch > 0.01')
df_full_mc_avg_ratio = df_full_mc_avg_ratio.assign(ratio_mc=lambda df: df.mc_weighted_ichg / df.mc_weighted_idch)
df_full_mc_avg_ratio = mps.id_to_name(df_full_mc_avg_ratio.reset_index())
display(df_full_mc_avg_ratio.head())
## some components (`nevent`) discharge at invalid prices. They are associated with numerical inaccuracies and
## negligible component energies. The code below identifies these cases and draws a scatter plot price ratio vs energy.
#df_full_mc_avg_ratio['invalid'] = (((df_full_mc_avg_ratio.pt == "HYD_STO")
# & (df_full_mc_avg_ratio.ratio_mc > 0.75)) |
# ((df_full_mc_avg_ratio.pt == "NEW_STO")
# & (df_full_mc_avg_ratio.ratio_mc > 0.9)))
#
#px.scatter(df_full_mc_avg_ratio, x='pwr_ichg', y='ratio_mc', color='pt', facet_col='pt')
df_full_mc_avg_ratio_agg = (df_full_mc_avg_ratio.groupby(['pt', 'run_id'])
.apply(lambda df: (df.ratio_mc * df.pwr_idch).sum() / df.pwr_idch.sum())
.rename('ratio_mc').reset_index())
df_full_mc_avg_ratio_agg = mps.id_to_name(df_full_mc_avg_ratio_agg)
fig = px.line(df_full_mc_avg_ratio_agg.sort_values('swst_vl'), x='swst_vl', y='ratio_mc',
facet_col='swfy_vl', color='pt', height=400)
fig
#fig = px.scatter(df_full_mc_avg_ratio.query('nd == "CH0"'), x='swst_vl', y='ratio_mc',
# facet_col='swfy_vl', color='pt', height=400)
#fig
#fig.update_layout(barmode='overlay')
#fig.update_traces(opacity=0.5)
slct_run_id = def_run.query('swfz_vl == "default" and swec_vl == "early_bat" and swfy_vl == "yr2015"')#.index.tolist()
slct_run_id = slct_run_id.run_id.tolist()
slct_pp_id = list(mps.pp2pp('STO'))
df_dd = mps.id_to_name(ddf_from_parquet('par_discharge_duration*', slct_run_id=slct_run_id).query(f'pp_id in {slct_pp_id}').compute())
df_cap = mps.id_to_name(ddf_from_parquet('par_cap_pwr_leg*', slct_run_id=slct_run_id).query(f'pp_id in {slct_pp_id}').compute())
df_erg = mps.id_to_name(ddf_from_parquet('var_yr_cap_erg_tot*', slct_run_id=slct_run_id).query(f'pp_id in {slct_pp_id}').compute())
df_dmnd = mps.id_to_name(ddf_from_parquet('par_dmnd*', slct_run_id=slct_run_id).compute()).query('swst_vl == "000%"')
# PHS capacity as fraction of national demand
dmnd_agg = df_dmnd.value.sum()#.rename('dmnd').reset_index()
df_erg_phs = df_erg.query('swst_vl == "100%"').groupby(['pt']).value.sum().rename('erg').reset_index()
df_cap_phs = df_cap.query('swst_vl == "100%"').groupby(['pt']).value.sum().rename('cap').reset_index()
pd.merge(df_cap_phs, df_erg_phs).assign(avg_dmnd=lambda x: dmnd_agg / 8760,
share_pwr=lambda x: x.cap / x.avg_dmnd * 100,
share=lambda x: x.erg / dmnd_agg * 100,
discharge_dur=lambda x: x.erg/x.cap)
slct_run_id = def_run.query('swfz_vl == "default" and swec_vl == "early_bat" and swst_vl == "000%"').run_id.tolist()
df_cap = mps.id_to_name(ddf_from_parquet('par_cap_pwr_leg*', slct_run_id=slct_run_id).query(f'pp_id in {list(mps.pp2pp("HYD_STO"))}').compute())
df_cap.pivot_table(index='swfy_vl', columns='pp', values='value').plot()
# Read all hourly power
ddf_var_sy_pwr = ddf_from_parquet('var_sy_pwr*')
# Aggregate energy over the whole year
df_pp_sum = ddf_var_sy_pwr.groupby(['run_id', 'pp_id', 'bool_out']).value.sum().compute().reset_index()
# Add time columns to hourly data
df_tm = pd.read_parquet(sc_out + 'tm_soy_full.parq').set_index('sy')[['hour']]
ddf_var_sy_pwr_hourly = ddf_var_sy_pwr.join(df_tm, on='sy')
# Group and sum by hour-of-the-day
group_cols = ['pp_id', 'bool_out', 'hour', 'ca_id', 'run_id']
ddf_var_sy_pwr_hourly = (ddf_var_sy_pwr_hourly.groupby(group_cols).value.sum())
df_var_sy_pwr_hourly = ddf_var_sy_pwr_hourly.compute()
df_var_sy_pwr_hourly = mps.id_to_name(df_var_sy_pwr_hourly.reset_index())
df_var_sy_pwr_hourly.head()
# Read power plant capacities
ddf_cap_sum = ddf_from_parquet('var_yr_cap_pwr_tot*')
df_cap_sum = ddf_cap_sum.compute()
# Read storage energy capacity
ddf_caperg_sum = ddf_from_parquet('var_yr_cap_erg_tot*')
# Read discharge duration parameter
ddf_dischdur = ddf_from_parquet('par_disch*')
# Merge energy capacity and discharge duration
ddf_caperg_sum = dd.merge(ddf_caperg_sum, ddf_dischdur, on=['pp_id', 'ca_id', 'run_id'],
suffixes=('caperg', 'discharge'), how='left')
df_caperg_sum = ddf_caperg_sum.compute()
# Translate ids of power plant, node, run, etc to corresponding names
df_pp_sum = mps.id_to_name(df_pp_sum.reset_index())
df_cap_sum = mps.id_to_name(df_cap_sum.reset_index())
df_caperg_sum = mps.id_to_name(df_caperg_sum)
df_pp_sum.head()
slct_run_id = def_run.loc[def_run.swfz_vl.isin(['default'])
& def_run.swst_vl.isin(['000%'])
& def_run.swec_vl.isin(['early_phs'])].run_id.tolist()
df_pp_sum_default = df_pp_sum.loc[df_pp_sum.run_id.isin(slct_run_id)]
df_cap_sum_default = df_cap_sum.loc[df_cap_sum.run_id.isin(slct_run_id)]
df_pp_sum_default.loc[df_pp_sum_default.bool_out, 'value'] *= -1
data = df_pp_sum_default.assign(value=df_pp_sum_default.value / 1e6)
fig = px.bar(data.sort_values(['swfy_vl', 'nd']), x='swfy_vl', y='value', hover_data=['pt'],
facet_col='nd', color='fl',
labels={'value': 'Energy (TWh/yr)', 'swfy_vl': 'Year'})
fig.update_yaxes(matches=None)
data = df_cap_sum_default.assign(value=df_cap_sum_default.value / 1e3)
fig = px.bar(data.query('pt != "LOL_LIN"').sort_values(['swfy_vl', 'nd']),
x='swfy_vl', y='value', hover_data=['pt'],
facet_col='nd', color='fl',
labels={'value': 'Installed capacity (GW)', 'swfy_vl': 'Year'})
fig.update_yaxes(matches=None)
slct_run_id = def_run.loc[def_run.swfz_vl.isin(['default'])
& def_run.swec_vl.isin(['early_phs'])
& def_run.swfy_vl.isin(['yr2015', 'yr2050'])].run_id.tolist()
print(slct_run_id)
df_pp_sum_swst = df_pp_sum.loc[df_pp_sum.run_id.isin(slct_run_id)]
df_pp_sum_swst.loc[df_pp_sum_swst.bool_out, 'value'] *= -1
df_pp_sum_swst
## Impact by swst_vl
join_cols = list(set(sw_cols) - {'swst_vl'} | {'bool_out', 'pp_id'})
data = df_pp_sum_swst.query('bool_out == False').assign(value=lambda x: x.value / 1e6)
data_ref = data.loc[data.swst_vl == '000%'].set_index(join_cols).value.rename('value_ref')
data = data.join(data_ref, on=join_cols).assign(value_diff=lambda x: x.value - x.value_ref)
data
fig = px.bar(data, x='swst_vl', y='value_diff',
hover_data=['fl', 'bool_out', 'run_id'] + sw_cols,
facet_col='nd', facet_row='swfy_vl', color='fl',
labels={'value': 'Installed capacity (GW)', 'swfy_vl': 'Year'})
fig.update_yaxes(matches=None)
frz_cap case ("frozen capacity", only fuel prices are changed)slct_run_id = def_run.query('swst_vl in ["000%", "050%"] '
' and swfy_vl in ["yr2015", "yr2030", "yr2050"]').run_id.tolist()
dfcap = (ddf_from_parquet('par_cap_pwr_leg*', slct_run_id=slct_run_id)
.assign(pt_id=lambda x: x.pp_id.replace(mps.dict_plant_2_pp_type_id))
.groupby(['run_id', 'pt_id']).value.sum().reset_index()
.compute())
dfcap = dfcap.loc[~dfcap.pt_id.isin(mps.pt2pt('LOL'))]
dfcap = mps.id_to_name(dfcap)
dfcap['run_no_fz'] = dfcap[set(sw_cols) - {'swfz_vl', 'swst_vl'}].apply(tuple, axis=1).astype(str)
dfcap
px.bar(dfcap, x='run_no_fz', y='value', color='pt', facet_col='swfz_vl', facet_row='swst_vl')
frz_vc case ("frozen prices", only capacities are changed)slct_run_id = def_run.query('swst_vl in ["000%", "050%"] '
' and swfy_vl in ["yr2015", "yr2030", "yr2050"]'
' and swec_vl in ["early_bat"]').run_id.tolist()
dfvcfl = ddf_from_parquet('par_vc_fl*', slct_run_id=slct_run_id)
dfvcfl = dfvcfl.loc[~dfvcfl.fl_id.isin(mps.fl2fl('lost'))
& dfvcfl.nd_id.isin([2])]
dfvcfl = dfvcfl.loc[dfvcfl.mt_id == 0].compute()
dfvcfl = mps.id_to_name(dfvcfl)
dfvcfl['run_no_fz'] = dfvcfl[set(sw_cols) - {'swfz_vl', 'swst_vl', 'swfy_vl'}].apply(tuple, axis=1).astype(str)
dfvcfl
fig = px.line(dfvcfl, x='swfy_vl', y='value', color='fl',
facet_col='swfz_vl', facet_row='swst_vl')
fig#.update_layout(barmode='group')
slct_run_id = def_run.query('swfy_vl in ["yr2015", "yr2030", "yr2050"]').run_id.tolist()
dfcap = ddf_from_parquet('var_yr_cap_erg_tot*', slct_run_id=slct_run_id)
dfcap = dfcap.loc[dfcap.pp_id.isin(mps.pp2pp('STO'))].compute()
dfcap['pt_id'] = dfcap.pp_id.replace(mps.dict_plant_2_pp_type_id)
dfcap['nd_id'] = dfcap.pp_id.replace(mps.dict_plant_2_node_id)
dfcap = (dfcap.pivot_table(index=['run_id', 'nd_id'], columns='pt_id', values='value')
.assign(value= lambda x: x[mps.dict_pt_id['NEW_STO']] / x[mps.dict_pt_id['HYD_STO']])
.reset_index())[['run_id', 'nd_id', 'value']]
dfcap = mps.id_to_name(dfcap)
px.scatter(dfcap, x='swst_vl', y='value', facet_col='nd', height=300,
labels={'value': 'Ratio energy capacity BAT/PHS',
'swst_vl': 'New storage penetration (%)'})
list_run_id = def_run.query('swst_vl == "000%" and swec_vl == "early_phs" and swfz_vl == "default"').run_id.tolist()
ddfmc = ddf_from_parquet('dual_supply*', columns=['sy', 'nd_id', 'run_id', 'value'], slct_run_id=list_run_id)
ddfmc = ddf_add_tm_cols(ddfmc, ['hour', 'doy', 'season'])
dfmc = ddfmc.loc[ddfmc.nd_id == mps.dict_nd_id['CH0']].compute()
dfmc = mps.id_to_name(dfmc).rename(columns={'value': 'mc'})
ddfresdmnd = ddf_from_parquet('var_sy_pwr*', columns=['sy', 'pp_id', 'run_id', 'value'], slct_run_id=list_run_id)
ddfresdmnd = ddfresdmnd.loc[ddfresdmnd.pp_id.isin(mps.pp2pp('DMND|SOL|WIN') - mps.pp2pp('FLEX'))]
ddfresdmnd = ddf_add_tm_cols(ddfresdmnd, ['hour', 'doy', 'season'])
ddfresdmnd['pt_id'] = ddfresdmnd.pp_id.replace(mps.dict_plant_2_pp_type_id)
ddfresdmnd = ddfresdmnd.groupby(['sy', 'hour', 'doy', 'season', 'pt_id', 'run_id']).value.sum()
dfresdmnd = mps.id_to_name(ddfresdmnd.compute().reset_index())
dfresdmnd = dfresdmnd.assign(ptype=lambda x: x.pt.replace({'WIN_ONS': 'VRE', 'WIN_OFF': 'VRE', 'SOL_PHO': 'VRE'}))
dfresdmndpv = dfresdmnd.pivot_table(index=['swfy_vl', 'hour', 'season', 'doy'], columns='ptype',
values='value', aggfunc=[sum, len]
)
assert tuple(dfresdmndpv['len'].max()) == (1,3), 'aggregating too many'
dfresdmndpv = dfresdmndpv['sum'].reset_index()
dfresdmndpv['res_dmnd'] = dfresdmndpv.DMND - dfresdmndpv.VRE
dfresdmndpv.drop(['DMND', 'VRE'], axis=1, inplace=True)
fig = px.box(dfresdmndpv, x='hour', y="res_dmnd", facet_col="swfy_vl", facet_row='season')
fig
fig = px.box(dfmc.sort_values('swfy_vl'), x='hour', y="mc", facet_col="swfy_vl", facet_row='season')
fig
# select representative night, morning, day, and evening time slots in line with the profile shapes above
dict_slots = {'nig': [2,3], 'mor': [7,8], 'day': [11, 12], 'eve': [17, 18]}
# calculate average residual demand based on hour selection in dict_slots
dfgp = dfresdmndpv[['doy', 'hour', 'swfy_vl', 'res_dmnd']].groupby(['doy', 'swfy_vl'])
def get_average_resdmnd(x):
return pd.Series({slot: x.set_index('hour').loc[list_hour].res_dmnd.mean() for slot, list_hour in dict_slots.items()})
dfresdmnd_avg = dfgp.apply(get_average_resdmnd).reset_index()
dfgp = dfresdmndpv[['doy', 'hour', 'swfy_vl', 'res_dmnd']].groupby(['doy', 'swfy_vl'])
def calc_ratio_diff(x, s1, s2, diff_ratio):
return x[s1] / x[s2] if diff_ratio == 'ratio' else x[s1] - x[s2]
dfresdmnd_avg = dfresdmnd_avg.assign(**{f'{diff_ratio}_{s1}_{s2}':
functools.partial(calc_ratio_diff, s1=s1, s2=s2, diff_ratio=diff_ratio)
for s1, s2 in [('mor', 'nig'), ('day', 'nig'), ('eve', 'nig')]
for diff_ratio in ['diff', 'ratio']
if not s1 == s2})
dfresdmnd_avg = (dfresdmnd_avg.join(df_tm_soy_full[['doy', 'dow_type', 'dow', 'season']]
.drop_duplicates()
.set_index('doy'), on='doy'))
fig = px.box(dfresdmnd_avg, x='season', y="ratio_day_nig", facet_col='swfy_vl')
fig
def hexbin(x, y, color, **kwargs):
plt.hexbin(x, y, gridsize=20, **kwargs)
g = sns.FacetGrid(dfresdmnd_avg, col="dow_type", size=6, sharex=False, sharey=False)
g.map(hexbin, "diff_mor_nig", "diff_day_nig")
g = sns.FacetGrid(dfresdmnd_avg, col="season", size=6, sharex=False, sharey=False)
g.map(hexbin, "diff_mor_nig", "diff_day_nig")
fig = make_subplots(rows=2, cols=2)
dict_slct = {(1, 1): ('Ratio day/night', 'ratio_day_nig'),
(2, 1): ('Difference day - night', 'diff_day_nig'),
(1, 2): ('Ratio morning/night', 'ratio_mor_nig'),
(2, 2): ('Difference mor - night', 'diff_mor_nig')}
for (col, row), (xname, datacol) in dict_slct.items():
for dt in [ 'SUN', 'SAT', 'WEEKDAY']:
x = df_2d_diffs.query(f'dow_type == "{dt}"')
fig.add_trace(go.Histogram(x=x[datacol], name=dt), col=col, row=row)
fig.update_xaxes(title_text=xname, row=row, col=col)
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.3)
fig.show()